import os
import re
import requests

keyword = input("请输入需要下载的小说名: ")

    
def get_one_page(url):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36'
        }
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
        return None
    except RequestException:
        return None


#根据传进来的URL获取数据并保存
#保存小说
def save_content(html):
    title = re.findall('<h1 class="title1">(.*?)</h1>',html,re.S)
    title = title[0]
    #print(title+'    开始下载')
    write_to_file(title)
    result = re.findall(r'<div id="content">([\s\S]*?)<p><p>',html,re.S)
    text = []

    for sentence in result:
        sentence = sentence.strip()
        sentence = sentence.strip('<p>')
        sentence = sentence.strip('</p>')
        if '</p><p>' in sentence:
                sentence = sentence.replace('</p><p>','')    # 去掉句子中的 <br />
                text.append(sentence)    
        else:
            text.append(sentence)

    for text_ in text:       
        write_to_file(text_)
        #print(text_)
    print(title+'    下载完成')
    write_to_file("\n")

#将数据保存到本地
def write_to_file(content):
    name = str(keyword) + '.txt'
    with open(name, 'a', encoding='utf-8') as f:
    #with open('小说.txt', 'a', encoding='utf-8') as f:
        f.write(content + '\n')

#获取章节目录URL并下载各章节
def get_url(url):
    html = get_one_page(url)
    URL = re.findall(r'<li class="c3"><a href="([\s\S]*?)"><span>',html,re.S)
    list_url = []
    for url_ in URL:
        list_url.append( 'http://quanben5.com' + url_ )
    print('开始下载' + keyword)
    for list_url_ in list_url:
        html_ = get_one_page(list_url_)
        save_content(html_)

def main():
    
    ##创建一个小说的文件夹,
    #os.makedirs(keyword)
    url = 'http://quanben5.com/index.php?c=book&a=search&keywords='+keyword
    #url = 'http://quanben5.com/n/yuzui/xiaoshuo.html'
    html1 = get_one_page(url)
    url1 = re.findall(r'<h3><a href="(.*?)">',html1,re.S)
    url1 = url1[0]
    url2 = 'http://quanben5.com'+url1+'/xiaoshuo.html'
    get_url(url2)

main()
